{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hybrid Colpali RAG using [VARAG](https://github.com/adithya-s-k/VARAG)\n",
    "\n",
    "\n",
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/adithya-s-k/VARAG/blob/main/docs/hybridColpaliRAG.ipynb)\n",
    "\n",
    "Requirement to RUN this notebook - Min T4 GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!git clone https://github.com/adithya-s-k/VARAG\n",
    "%cd VARAG\n",
    "%pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!apt-get update && apt-get install -y && apt-get install -y poppler-utils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install -e ."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sentence_transformers import SentenceTransformer\n",
    "from varag.rag import HybridColpaliRAG\n",
    "from varag.llms import OpenAI\n",
    "from varag.utils import get_model_colpali\n",
    "import lancedb\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"api-key\"\n",
    "\n",
    "load_dotenv()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "shared_db = lancedb.connect(\"~/shared_rag_db\")\n",
    "\n",
    "model, processor = get_model_colpali(\"vidore/colpali-v1.2\")\n",
    "embedding_model = SentenceTransformer(\"jinaai/jina-clip-v1\", trust_remote_code=True)\n",
    "\n",
    "colpali_hybrid_rag = HybridColpaliRAG(\n",
    "    colpali_model=model,\n",
    "    colpali_processor=processor,\n",
    "    db=shared_db,\n",
    "    image_embedding_model=embedding_model,\n",
    "    table_name=\"hybridColpaliDemo\",\n",
    ")\n",
    "\n",
    "vlm = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "colpali_hybrid_rag.index(\n",
    "        \"./examples/data\", \n",
    "        overwrite=False, \n",
    "        recursive=False, \n",
    "        verbose=True\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"What is Colpali\"\n",
    "num_results = 5\n",
    "\n",
    "results = colpali_hybrid_rag.search(query, k=5)\n",
    "\n",
    "images = [result[\"image\"] for result in results]\n",
    "\n",
    "# Display the images\n",
    "for i, img in enumerate(images, 1):\n",
    "    print(f\"Image {i}:\")\n",
    "    display(img)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import display, Markdown\n",
    "\n",
    "\n",
    "response = vlm.query(query, images, max_tokens=1000)\n",
    "\n",
    "\n",
    "display(Markdown(response))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Run Gradio Demo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%cd examples\n",
    "!python hybridColpaliDemo.py --share"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
